# Stock price & SBTi verification among S&P 500 companies
# Author: Inhwan Ko (University of Washington)
# Date: Jan 30, 2023

pacman::p_load(MASS, readxl, tidyverse, wfe, lmtest, sandwich)

# Main dataset: stock price
# source: COMPUSTAT: Stock price monthly

stockprice <- read_excel("stock_price_monthly.xlsx")
stockprice <- stockprice[,c(4,3,5)]
colnames(stockprice) <- c("tic","date","price")

stockprice$month <- stockprice$year <- NA
for (i in 1:nrow(stockprice)){
  stockprice$year[i] <- str_split(stockprice$date[i], pattern="-")[[1]][1]
  stockprice$month[i] <- str_split(stockprice$date[i], pattern="-")[[1]][2]
}
stockprice <- stockprice %>% 
  # need to change into quarterly because of control variables
  filter(month=="03" | month=="06" | month=="09" | month=="12") %>% 
  select(tic, year, month, price)

unique(stockprice$tic) %>% length()

# 2nd dataset: SBTi verification status data

sbti <- read_excel("S&P500_sbti_list.xlsx")
sbti$month <- sbti$year <- NA
for (i in 1:nrow(sbti)) {
  sbti$year[i] <- str_split(sbti$Date[i], pattern="/")[[1]][3]
  sbti$month[i] <- str_split(sbti$Date[i], pattern="/")[[1]][1]  
}

sbti <- sbti %>% 
  select(TIC, year, month, NearTermTargetStatus, NearTermTargetClass,LongTermTargetStatus)

colnames(sbti) <- c("tic","year","month","nearterm_status","nearterm_target","longterm_status")

sbti$longterm <- sbti$near_2c <- sbti$near_wb2c <- sbti$near_1.5c <- sbti$comm <- 0

for (i in 1:nrow(sbti)) {
  sbti$comm[i] <- ifelse(sbti[i,4]=="Committed", 1, 0)
  sbti$near_1.5c[i] <- ifelse(sbti[i,4]=="Targets Set" & sbti[i,5]=="1.5°C", 1, 0)
  sbti$near_wb2c[i] <- ifelse(sbti[i,4]=="Targets Set" & sbti[i,5]=="Well-below 2°C", 1, 0)
  sbti$near_2c[i] <- ifelse(sbti[i,4]=="Targets Set" & sbti[i,5]=="2°C", 1, 0)
  sbti$longterm[i] <- ifelse(sbti[i,6]=="Targets Set", 1, 0)
}

sbti$month[sbti$month=="01"] <- "03"

# units are 500
# periods are 2010-2022 Q3, so 12*4 + 1*3 = 51
# 51 X 500 is what we want

data <- data.frame(tic=rep(NA, 51*500),year=rep(NA, 51*500),month=rep(NA, 51*500))
data$tic <- rep(unique(sbti$tic),51)
data$year <- c(rep("2010",2000),rep("2011",2000),rep("2012",2000),rep("2013",2000),
               rep("2014",2000),rep("2015",2000),rep("2016",2000),rep("2017",2000),
               rep("2018",2000),rep("2019",2000),rep("2020",2000),rep("2021",2000),
               rep("2022",1500))
data <- data %>% arrange(-desc(year), -desc(tic))
data$month <- c(rep(c("03","06","09","12"), 500*12) , rep(c("03","06","09"),500))
data <- data %>% arrange(-desc(tic),-desc(year))

data <- left_join(data, stockprice, by=c("tic","year","month"))
data <- left_join(data, sbti, by=c("tic","year","month"))

colnames(data)
data$comm[is.na(data$comm)] <- 0
data$near_1.5c[is.na(data$near_1.5c)] <- 0
data$near_wb2c[is.na(data$near_wb2c)] <- 0
data$near_2c[is.na(data$near_2c)] <- 0
data$longterm[is.na(data$longterm)] <- 0

for (i in 2:nrow(data)) {
  data$comm[i][data$tic[i]==data$tic[i-1] & data$comm[i-1]==1] <- 1
  data$near_1.5c[i][data$tic[i]==data$tic[i-1] & data$near_1.5c[i-1]==1] <- 1
  data$near_wb2c[i][data$tic[i]==data$tic[i-1] & data$near_wb2c[i-1]==1] <- 1
  data$near_2c[i][data$tic[i]==data$tic[i-1] & data$near_2c[i-1]==1] <- 1
  data$longterm[i][data$tic[i]==data$tic[i-1] & data$longterm[i-1]==1] <- 1
}

data <- data[,c(1,2,3,4,8,9,10,11,12)]
data$quarter[data$month=="03"] <- "1"
data$quarter[data$month=="06"] <- "2"
data$quarter[data$month=="09"] <- "3"
data$quarter[data$month=="12"] <- "4"

data$yq <- paste(data$year, "Q", data$quarter, sep="")
data <- data[,-10]

# 3rd dataset: controls

ctrl <- read_excel("controls.xlsx")
colnames(ctrl)
ctrl <- ctrl[,c(9,14,16,20,17,24,22,23,25,29)]
colnames(ctrl) <- c("tic","yq","asset","intangible",
                    "cash","liabilities","long_invest","short_invest","netincome",
                    "industrycode")

ctrl <- ctrl[-5169,] # duplicate 2020Q2 entry for LHX 

finaldata <- left_join(data, ctrl, by=c("tic","yq"))

write.csv(finaldata, "finaldata.csv")
# check the data

unique(finaldata$tic) %>% length
unique(finaldata$yq) %>% length

# refine the variables

finaldata$price_log <- log(finaldata$price + 1)
finaldata$asset_log <- log(finaldata$asset + 1)
finaldata$intan_prop <- 100* finaldata$intangible / finaldata$asset
finaldata$cash_log <- log(finaldata$cash + 1)
finaldata$liab_log <- log(finaldata$liabilities + 1)
finaldata$sinvest_log <- log(finaldata$short_invest + 1)
finaldata$linvest_log <- log(finaldata$long_invest + 1)
finaldata$industrycode_f <- as.factor(finaldata$industrycode)
finaldata$year_f <- as.factor(finaldata$year)

# make a panel data easily
yq <- data.frame(period=1:51, yq=unique(finaldata$yq))
finaldata <- left_join(finaldata,yq, by="yq")

## creating lagged variables
finaldata$netincome_d <- finaldata$netincome_l <- finaldata$price_l <- NA

for (i in 2:nrow(finaldata)){
    finaldata$netincome_l[i][finaldata$tic[i]==finaldata$tic[i-1]] <- 
      finaldata$netincome[i-1]
    finaldata$price_l[i][finaldata$tic[i]==finaldata$tic[i-1]] <- 
      finaldata$price[i-1]
}

finaldata$netincome_d <- 100* (finaldata$netincome - finaldata$netincome_l) / finaldata$netincome_l
finaldata$netincome_d <- round(finaldata$netincome_d, 3)
finaldata[finaldata==Inf] <- NA

finaldata$committed <- ifelse(
  finaldata$comm==1 | finaldata$near_1.5c ==1 | 
    finaldata$near_wb2c==1 | finaldata$near_2c==1,
  1, 0
)

committers_list <- unique(finaldata$tic[finaldata$committed==1])
finaldata$committers <- ifelse(finaldata$tic %in% committers_list, 1, 0)

finaldata$verified <- ifelse(
  finaldata$near_1.5c ==1 | finaldata$near_wb2c==1 | finaldata$near_2c==1,
  1, 0
)
verifiers_list <- unique(finaldata$tic[finaldata$verified==1])
finaldata$verifiers <- ifelse(finaldata$tic %in% verifiers_list, 1, 0)


# weighted two-way fixed effects model
colnames(finaldata)

## Analysis 1: Full sample (SBTi group vs non-group)
fullsample <- finaldata

### baseline model
formula1_1 <- price ~ committed + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f
wfe1_1 <- wfe(formula1_1, fullsample,
            treat="committed", unit.index="tic", time.index="period",
            method="unit", estimator="did", unweighted=F)
summary(wfe1_1)

### AR(1) model
formula1_2 <- price ~ committed + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f
wfe1_2 <- wfe(formula1_2, fullsample,
              treat="committed", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe1_2) 
 
### AR(1) model + time cubic
formula1_3 <- price ~ committed + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f + period + I(period^2) + I(period^3)
wfe1_3 <- wfe(formula1_3, fullsample,
              treat="committed", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe1_3)


## Analysis 2: SBTi group (verified vs non-verified)
sbtisample <- finaldata %>% filter(committers==1)

### baseline model
formula2_1 <- price ~ verified + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f
wfe2_1 <- wfe(formula2_1, sbtisample,
              treat="verified", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe2_1)

### AR(1) model
formula2_2 <- price ~ verified + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f
wfe2_2 <- wfe(formula2_2, sbtisample,
              treat="verified", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe2_2)

### AR(1) model + time cubic
formula2_3 <- price ~ verified + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f + period + I(period^2) + I(period^3)
wfe2_3 <- wfe(formula2_3, sbtisample,
              treat="verified", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe2_3)


## Analysis 3: Verified group (1.5c vs other targets)
verisample <- finaldata %>% filter(verifiers==1)

### baseline model
formula3_1 <- price ~ near_1.5c + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + industrycode_f
wfe3_1 <- wfe(formula3_1, verisample,
              treat="near_1.5c", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe3_1)

### AR(1) model
formula3_2 <- price ~ near_1.5c + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d
wfe3_2 <- wfe(formula3_2, verisample,
              treat="near_1.5c", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe3_2)

### AR(1) model + time cubic
formula3_3 <- price ~ near_1.5c + price_l + asset_log + cash_log + liab_log + sinvest_log +
  linvest_log + netincome_d + period + I(period^2) + I(period^3)
wfe3_3 <- wfe(formula3_3, verisample,
              treat="near_1.5c", unit.index="tic", time.index="period",
              method="unit", estimator="did", unweighted=F)
summary(wfe3_3)


